library(readxl)
library(dplyr)
library(stringr)
library(purrr)
library(lubridate)
library(data.table)
library(plm)
library(lmtest)
library(sandwich)
library(ggplot2)

#####################
## Data Processing ##
#####################
# vdem <- read.csv("V-Dem-CY-Full+Others-v14.csv")
navco <- read_excel("navco3-0full.xlsx")    

navco$COWcode <- navco$cowcode
navco <- navco %>%
  mutate(year = as.numeric(format(date, "%Y")))

vdem.sel <- vdem %>% 
  dplyr::select(country_id, country_name, year, COWcode, v2xel_locelec, 
                v2x_polyarchy, e_gdppc, e_pop) 

navco <- left_join(navco, vdem.sel, by = c("COWcode", "year"))

navco <- navco %>%
  mutate(
    num_partic_clean = num_partic_event %>%
      str_to_lower() %>%
      str_replace_all(",", "") %>%
      str_replace_all(" to ", "-") %>%
      str_replace_all("[–—−]", "-") %>%             
      str_replace_all("[^0-9\\-]", "") %>%
      str_replace_all("-{2,}", "-") %>%
      str_remove_all("^-|-$")
  ) %>%
  mutate(
    num_partic_numeric = case_when(
      is.na(num_partic_clean) | num_partic_clean %in% c("", "na", ".") ~ NA_real_,
      str_detect(num_partic_clean, "^\\d+$") ~ as.numeric(num_partic_clean),
      str_detect(num_partic_clean, "^\\d+-\\d+$") ~ map_dbl(num_partic_clean, function(x) {
        parts <- str_split(x, "-", simplify = TRUE)
        mean(as.numeric(parts))
      }),
      TRUE ~ NA_real_
    )
  )

navco <- navco %>%
  mutate(
    fatal_casu_clean = fatal_casu %>%
      str_to_lower() %>%
      str_replace_all(",", "") %>%
      str_replace_all(" to ", "-") %>%
      str_replace_all("[–—−]", "-") %>%          
      str_replace_all("[^0-9\\-]", "") %>%
      str_replace_all("-{2,}", "-") %>%
      str_remove_all("^-|-$")
  ) %>%
  mutate(
    fatal_casu_numeric = case_when(
      is.na(fatal_casu_clean) | fatal_casu_clean %in% c("", "na", ".") ~ NA_real_,
      str_detect(fatal_casu_clean, "^\\d+$") ~ as.numeric(fatal_casu_clean),
      str_detect(fatal_casu_clean, "^\\d+-\\d+$") ~ map_dbl(fatal_casu_clean, function(x) {
        parts <- str_split(x, "-", simplify = TRUE)
        mean(as.numeric(parts))
      }),
      TRUE ~ NA_real_
    )
  )

navco$fatalities <- navco$fatal_casu_numeric
navco$participants <- navco$num_partic_numeric 

navco <- navco %>%
  mutate(event_date = as.Date(date))

navco <- navco %>%
  arrange(COWcode, event_date)

setDT(navco)
setorder(navco, COWcode, event_date)

navco <- navco %>%
  arrange(COWcode, event_date) %>%
  group_by(COWcode) %>%
  mutate(cumulated_fatal_weighted = map_dbl(row_number(), function(i) {
    if (i > 1) {
      days_diff <- as.numeric(difftime(event_date[i], event_date[1:(i - 1)], units = "days"))
      weights <- 1 - (days_diff / max(days_diff))  
      weighted.mean(fatalities[1:(i - 1)], w = weights, na.rm = TRUE)
    } else {
      NA_real_
    }
  })) %>%
  ungroup()

# Define decay parameter
half_life <- 730
lambda <- log(2) / half_life

# Create exponential decay weighted average
navco <- navco %>%
  arrange(COWcode, event_date) %>%
  group_by(COWcode) %>%
  mutate(cumulated_fatal_weighted_exp = map_dbl(row_number(), function(i) {
    if (i > 1) {
      days_diff <- as.numeric(difftime(event_date[i], event_date[1:(i - 1)], units = "days"))
      weights_exp <- exp(-lambda * days_diff)
      weighted.mean(fatalities[1:(i - 1)], w = weights_exp, na.rm = TRUE)
    } else {
      NA_real_
    }
  })) %>%
  ungroup()

navco_meaned <- navco %>%
  group_by(COWcode, event_date) %>%
  dplyr::summarize(
    participants_mean = mean(participants, na.rm = TRUE),
    fatalities = mean(fatalities, na.rm = TRUE),
    v2x_polyarchy = mean(v2x_polyarchy, na.rm = TRUE),
    gdppc = mean(e_gdppc, na.rm = TRUE),
    pop = mean(e_pop, na.rm = TRUE),
    cumulated_fatal_weighted = mean(cumulated_fatal_weighted, na.rm = TRUE),
    cumulated_fatal_weighted_exp = mean(cumulated_fatal_weighted_exp, na.rm = TRUE),  
    v2xel_locelec = mean(v2xel_locelec, na.rm = TRUE),
    .groups = "drop"
  )

navco_final <- navco_meaned %>%
  arrange(COWcode, event_date) %>%
  group_by(COWcode) %>%
  mutate(
    fatality_diff_weighted = fatalities - cumulated_fatal_weighted,
    fatality_diff_weighted_exp = fatalities - dplyr::lag(cumulated_fatal_weighted_exp), 
    participants_lead = dplyr::lead(participants_mean)
  ) %>%
  ungroup() %>%
  mutate(
    fatality_diff_signed_weighted = sign(fatality_diff_weighted) * log1p(abs(fatality_diff_weighted)),
    fatality_diff_signed_weighted_exp = sign(fatality_diff_weighted_exp) * log1p(abs(fatality_diff_weighted_exp)), 
    participants_lead = log1p(participants_lead)
  )

navco_final <- navco_final %>% 
  mutate(fatalities_log = log1p(fatalities)) %>%
  mutate(gdppc = log1p(gdppc)) %>% 
  mutate(pop = log1p(pop)) 

pdata <- pdata.frame(navco_final, index = c("COWcode", "event_date"))

write.csv(pdata, "navco_country_final.csv", row.names = FALSE)
